library(Seurat)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(Matrix)
library(cowplot)
## Warning: package 'cowplot' was built under R version 3.5.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.3
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
library(scrattch.hicat)
library(patchwork)
## Warning: package 'patchwork' was built under R version 3.5.3
## 
## Attaching package: 'patchwork'
## The following object is masked from 'package:cowplot':
## 
##     align_plots
#setwd('~/postdoc2/Gedankenpapers/CNevomanuscript/code/snRNAseq/mouse/processing/')

Load data.

load('../data/rawdata.RData')


p1<-DimPlot(object = N31, reduction = 'tsne', label=TRUE)
p2<-DimPlot(object = N31, reduction = 'tsne', label=TRUE,group.by = 'DCN')
p3<-DimPlot(object = N31, reduction = 'tsne', label=TRUE,group.by = 'FACS')
plot_grid(p1,p2,p3,ncol = 3)

plot number of detected genes and reads per cluster

VlnPlot(object = N31, feature = c("nFeature_RNA", "nCount_RNA"))

Plot maker genes

FeaturePlot(object = N31, features = c("Snap25","Slc17a6","Gad1","Slc6a5","Gabra6"), reduction= "tsne")

Subset data: clusters 1,0,18 gabra6+ clusters 4,9,14,15,17 low genes/counts

dcn<-subset(N31,idents=c(0,1,18,4,9,14,15,17),invert=T)
DimPlot(dcn)

dcn <- FindVariableFeatures(object = dcn,selection.method = "vst",nfeatures = 2000,verbose=FALSE)
dcn <- ScaleData(object = dcn,vars.to.regress = c('nCount_RNA','FACS','nFeature_RNA'))
## Regressing out nCount_RNA, FACS, nFeature_RNA
## Centering and scaling data matrix
#dcn <- RunPCA(object = dcn,npcs = 30,verbose = FALSE)
source('../../helperfunctions/pc_modification_functions_S3_forRNA.R')
## Loading required package: qlcMatrix
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'qlcMatrix'
dcn<-RunTruncatedPCA(dcn,n.genes.pc = 40)
## PC_ 1 
## Positive:  Zfhx3, Pdzd2, Grm7, Slc24a4, Fbxl7, Prkg1, Ptprk, Fxyd6, Foxp2, Ramp3 
##     Grm3, Asic2, Syt6, Coch, Trpm3, Gabrb1, Kcnip1, Fbln5, Cdh4, Gm40999 
##     Drd2, Grid2, Prr16, Ncald, Adarb2, Prex2, Glra3, Sorcs3, Trp63, Mal2 
## Negative:  Pde4b, Nfib, St18, Cdh20, Zfpm2, Gjc3, Zeb2, Rgs6, 1700047M11Rik, Ptprz1 
##     Tmtc2, Kit, Plxnb3, Mgat4c, Neat1, Npas3, Sec14l5, Sox2ot, Ttyh2, Cmtm5 
##     Mag, Slc6a5, Igsf11, Synpr, Cldn11, Fa2h, Tspan15, Stxbp2, Aspa, Cdh19 
## PC_ 2 
## Positive:  Rgs6, Zfpm2, Mgat4c, Kit, Ptprz1, Nfib, Slc6a5, Rbfox1, Nxph1, Tmtc2 
##     Synpr, Npas3, Dab1, Slc36a2, Penk, Trim67, Hs3st4, Shisa9, Kcnip4, Plcb1 
##     Pde4b, Gm3764, Hs6st3, Stxbp2, Map3k8, Afap1l2, Kcnn3, Apcdd1, Hs3st2, Adrb2 
## Negative:  Ptprk, Grm7, Gjc3, Zfhx3, 1700047M11Rik, Neat1, Plxnb3, Sec14l5, Mog, Prr5l 
##     Sox2ot, Slc24a4, Mag, St6galnac3, Cdh19, Pdzd2, Cldn11, St18, Aspa, Prkg1 
##     Grm3, Fbxl7, Tspan2, ENSMUSG00000027199, Igsf11, Cmtm5, Foxp2, Bcas1, Mobp, Fxyd6 
## PC_ 3 
## Positive:  Sec14l5, 1700047M11Rik, Aspa, Mog, St18, ENSMUSG00000111219, Mag, Cldn11, Mobp, Prr5l 
##     E330020D12Rik, Tnfaip6, Lpar1, Rasgrp3, Fa2h, Gm44866, Tspan15, Gas7, Nipal4, Hapln2 
##     Gjc3, Tnni1, Plxnb3, Cldn14, A230001M10Rik, Insc, Anln, Opalin, Apod, Rnf220 
## Negative:  Stk32a, Slc39a12, Slc38a3, Cd38, Slc4a4, Aqp4, Sox6, Etnppl, Acss3, Cyp2j9 
##     Gja1, Ntsr2, Gpr37l1, Pdgfd, Bmpr1b, Gjb6, A330076C08Rik, Aldh1a1, Gm6145, Acsbg1 
##     S1pr1, Slc7a10, Ednrb, F3, Rorb, Slc1a2, Pax3, Egfr, Serpine2, Agt 
## PC_ 4 
## Positive:  Spp1, Cpne4, Unc5d, Kcnip4, Gpc5, D630045J12Rik, Rbfox1, Zfp804b, Nrg1, Grik1 
##     Fam19a2, Itpr1, Nell2, Adamts6, Zfyve28, Dab1, Kirrel3, Col24a1, Pld1, Chrna4 
##     Pde4b, Plpp3, Gm10801, Ninl, Zpbp, Gm26917, Slitrk6, Kank1, Gpr83, Epb41l2 
## Negative:  Adarb2, Shisa9, Slc6a5, Kit, Plcb1, Nxph1, Synpr, Syndig1, Ptprk, Grm3 
##     Asic2, Ptprz1, Slc24a4, Igfbp2, Kcnn3, Tmtc2, Fam107b, Ust, Slc36a2, Zfhx3 
##     Grid2, Apcdd1, Map3k8, Fxyd6, Plppr4, Syt6, Penk, Zfpm2, Ramp3, Nfib 
## PC_ 5 
## Positive:  Kirrel3, Gria1, Zfp804b, Gm38048, Mgat4c, Fam19a2, Car8, Trabd2b, Ryr1, Sst 
##     Calb1, Prkg1, Mctp1, Unc5d, Gabrb1, Grik1, Grm7, Casq2, Nckap5, Gm28928 
##     Iltifb, Gabrg3, Stac, Cacng3, Myo10, Nell2, Cck, Col24a1, Fbn2, Prex2 
## Negative:  Spp1, Adarb2, Fbxl7, Pdzd2, Slc24a4, Stxbp2, Ramp3, Grm3, Zfhx3, Syt6 
##     Gm3294, Coch, Zfyve28, Itpr1, Tox2, Pld1, Plpp3, Aqp4, Gpr83, Etnppl 
##     Acsbg1, Itpr2, Zpbp, Hs6st3, Stk32a, Slc39a12, Slc7a10, Cd38, Gjb6, Qk
ElbowPlot(object = dcn,ndims = 30)

usefuldims=1:26
dims.remove=c(10,12,13,18,19,21,23,24,25)
usefuldims=usefuldims[!usefuldims %in% dims.remove]
dcn<- FindNeighbors(dcn,dims=usefuldims)
## Computing nearest neighbor graph
## Computing SNN
dcn <- FindClusters(object = dcn, reduction = "pca", dims = usefuldims, resolution = 2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4988
## Number of edges: 201872
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6724
## Number of communities: 19
## Elapsed time: 0 seconds
dcn <- RunTSNE(object = dcn, dims = usefuldims, perplexity=30, dim.embed = 2,seed.use = 2)


p1<-DimPlot(object = dcn, reduction = 'tsne', label=TRUE)
p2<-DimPlot(object = dcn, reduction = 'tsne', label=TRUE,group.by = 'DCN')
p3<-DimPlot(object = dcn, reduction = 'tsne', label=TRUE,group.by = 'FACS')
plot_grid(p1,p2,p3,ncol = 3)

VlnPlot(dcn,features=c("nFeature_RNA", "nCount_RNA"))

Figure out which dimensions are actually useful for clustering.

#FeaturePlot(dcn, features=paste0("PC_", 1:30),cols = c("navy", "olivedrab1"))

plot maker genes

FeaturePlot(object = dcn, features = c("Snap25","Slc17a6","Gad1","Slc6a5","Gabra6"), reduction= "tsne")

let’s look only at the excitatory neurons.

ex<-subset(dcn,idents=c(13,6,10,4,5,8,1),invert=F)
DimPlot(ex)

ex <- FindVariableFeatures(object = ex,selection.method = "vst",nfeatures = 2000,verbose=FALSE)
ex <- ScaleData(object = ex,vars.to.regress = c('nCount_RNA','FACS','nFeature_RNA'))
## Regressing out nCount_RNA, FACS, nFeature_RNA
## Centering and scaling data matrix
ext<-RunTruncatedPCA(ex,dim=1:30)
## PC_ 1 
## Positive:  Grm7, Celf4, Prkg1, Kirrel3, Fnbp1l, Gsg1l, Dscaml1, March1, Snca, Kcnb2 
##     Syt16, Luzp2, Kcnq5, Chl1, Mgat4c, Slc6a1, Zfp804b, 6330403K07Rik, Cacna1e, Shisa9 
##     Unc5c, Sema6d, Atp2b4, Lmx1a, Fam19a2, Fxyd6, Dcc, Mal2, Pkib, Ablim3 
## Negative:  Spp1, Crhr1, Peak1, Kcnip4, Hs6st3, Stxbp2, Fign, Zfpm2, Galnt18, Fam110b 
##     Gpc6, Zfyve28, Adarb2, Tcerg1l, Pld1, Fignl2, L3mbtl4, Sgpp2, Gm26917, Zpbp 
##     Sgcd, Cpne4, Exog, Cnksr2, Slmap, Slc7a2, Opa1, Trpm2, Bmper, Anxa3 
## PC_ 2 
## Positive:  Sorcs3, Cntn4, Kynu, Cacna1g, Fstl5, Sulf1, Cpne4, Ankfn1, March1, Gabrg3 
##     Fign, Necab1, Pde1a, Lamb1, Adamtsl1, Sst, Adcy2, Crhr1, Gal, Gm28928 
##     6330403K07Rik, Galnt18, Fxyd6, Peak1, Sntg2, Tcerg1l, Prr16, Cpne6, Cacng3, Zfp521 
## Negative:  Cntnap4, Lmx1a, Kcnc2, Nrg1, Gm45904, Grm7, Thsd7b, Mgat4c, Fam19a2, Gpc5 
##     Zfp804b, Unc5c, Nox4, Nrxn3, Col24a1, Ndst4, Kcnip4, Prkg1, Alk, Gm11309 
##     Htr2a, Dab1, Gm13974, Kirrel3, Ecel1, Cd36, Penk, Grm3, Adamts6, Sema3e 
## PC_ 3 
## Positive:  Nfib, Nxph1, Asic2, Prr16, Sgcd, Chrm2, Hs6st3, Gm32647, Gfra1, Dlgap2 
##     L3mbtl4, Glis3, Ptprk, Gm10800, Ano1, Ndst4, Ankfn1, Thsd7b, Gm10801, Penk 
##     Foxp2, Zeb2, Htr2a, Ust, Zfp521, Zfpm2, Sox6, Col24a1, Hs3st4, Syndig1 
## Negative:  Trpm3, Galntl6, Zfp804b, Atp1a3, Dscaml1, Cpne4, CT010467.1, Chl1, Scg2, Cpne6 
##     Syngr3, Aldoc, Cst3, Txndc15, Tcerg1l, Sntg2, Nox4, Fnbp1l, Mfsd5, Gsg1l 
##     Gpc6, Fign, Pld3, Ecel1, Ablim3, Tram1l1, Serpina9, Napa, P4hb, Gstm5 
## PC_ 4 
## Positive:  Grik1, Trpm3, Cpne4, Ano3, Sulf1, Gm10801, Greb1, Gm10800, Lama2, Zfpm2 
##     Kcnc2, Nox4, Dab1, Cacna1e, Fam107b, Zfp804b, Gpc5, Chl1, Sema3e, Pde1a 
##     Syndig1, Trabd2b, Gm13974, Gm28928, Adcy2, Sdk1, Mgat4c, Prlr, Nckap5, Sox6 
## Negative:  Asic2, Atp1a3, CT010467.1, Lmx1a, Psap, Col24a1, Fxyd6, Gm32647, Thsd7b, Scg2 
##     Unc5d, Pkib, L3mbtl4, Nfib, Spp1, Atp2b4, Pld3, 6330403K07Rik, Crhbp, Mfsd5 
##     Aldoc, Cdh4, Serpina9, Mboat7, Luzp2, Tram1l1, Ndufs7, Napa, Cacybp, Rcn1 
## PC_ 5 
## Positive:  Gm10800, Hpse2, Gm10801, Lmx1a, Lama2, Gm20754, Synpr, Prkg1, Scn3b, Gm32647 
##     Myo1b, Fxyd6, Atp2b4, Casd1, Bnc2, Slc18a2, Fbn2, Col24a1, Insc, Otoa 
##     Tpd52l1, Gm19965, Crhbp, Ush2a, Prrg1, Kcnn3, Gm42397, Asic2, Fbln1, Col5a2 
## Negative:  CT010467.1, Atp1a3, Grik1, Ano3, Nxph1, Trpm3, Dpp10, Kcnip4, Dcc, Afdn 
##     Prr16, Sez6l, Mctp1, Ndst4, Dab1, Sgpp2, Gfra1, Kcnb2, Sulf1, Ncoa2 
##     Cacna1e, Slc6a1, Cntn4, Sema6d, Adcy2, Sntg2, Nfib, Ankfn1, Zfp521, Sdk1
ElbowPlot(object = ext,ndims = 30)

#ext <- JackStraw(object = ext, dims=30)
#ext<- ScoreJackStraw(ext,dims=1:30)
#JackStrawPlot(object = ext,dims=1:30)
usefuldims=1:18
dims.remove=c(17)
dims.remove=c()

usefuldims=usefuldims[!usefuldims %in% dims.remove]

replot with dropped pcs

ext<- FindNeighbors(ext,dims=usefuldims)
## Computing nearest neighbor graph
## Computing SNN
ext <- RunTSNE(object = ext, dims = usefuldims, perplexity=30, dim.embed = 2)
ext <- FindClusters(object = ext, reduction.type = "pca", dims.use = usefuldims, resolution = 2, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2298
## Number of edges: 88158
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6727
## Number of communities: 18
## Elapsed time: 0 seconds
p1<-DimPlot(object = ext, reduction = 'tsne', label=TRUE)
p2<-DimPlot(object = ext, reduction = 'tsne', label=TRUE,group.by = 'DCN')
p3<-DimPlot(object = ext, reduction = 'tsne', label=TRUE,group.by = 'FACS')
plot_grid(p1,p2,p3,ncol = 3)

merge clusters using allen package.

rd.dat <- t(GetAssayData(object = ext, slot = "scale.data"))

merge.param <- de_param(padj.th = 0.05, 
                     lfc.th      = 1, 
                     low.th      = 1, 
                     q1.th       = 0.4, 
                     q.diff.th   = 0.5, 
                     de.score.th = 40)

merge.result <- merge_cl(as.matrix(GetAssayData(object = ext, slot = "data")), 
                         cl = ext$RNA_snn_res.2, 
                         rd.dat = rd.dat,
                         de.param = merge.param)
## Loading required package: limma
ext$merged.res.2<-as.factor(paste('ex_',merge.result$cl,sep=''))
p2<-DimPlot(ext,group.by = "merged.res.2",label=T)
p1<-DimPlot(ext,group.by = "RNA_snn_res.2")
p3<-DimPlot(ext,group.by = 'DCN')
plot_grid(p1,p2,p3,ncol=3)

Idents(ext)<-'merged.res.2'
p2<-DimPlot(ext,group.by = "merged.res.2")
p1<-DimPlot(ext,group.by = "FACS")
p3<-DimPlot(ext,group.by = 'DCN')
plot_grid(p2,p3,p1,ncol=3)

#save(file='ex_celltypes.RData',ext)

now look at non-glycinergic inhibitory cell types

inh<-subset(dcn,idents=c(0,2,9),invert=F)

DimPlot(inh)

inh <- FindVariableFeatures(object = inh,selection.method = "vst",nfeatures = 2000,verbose=FALSE)
inh <- ScaleData(object = inh,vars.to.regress = c('nCount_RNA','FACS','nFeature_RNA'))
## Regressing out nCount_RNA, FACS, nFeature_RNA
## Centering and scaling data matrix
source('../../helperfunctions/pc_modification_functions_S3_forRNA.R')
## Loading required package: qlcMatrix
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'qlcMatrix'
inh<-RunTruncatedPCA(inh,n.genes.pc = 40)
## PC_ 1 
## Positive:  Ptprt, Malat1, Lgr5, Angpt1, Cd226, Arhgap15, Mgat4c, Cryzl2, Npsr1, Kcnh8 
##     ENSMUSG00000039840, Abi3bp, Pik3c2g, Slco4a1, Arhgap28, Cdc14a, Esr1, Pde4b, Spata13, Nectin3 
##     Adcy2, Ust, Gm28928, Cep128, Cpne8, Btk, Prdm1, Chst7, Gm15983, Sncaip 
## Negative:  CT010467.1, Ndrg4, Syvn1, Gm26917, Adcy1, Peg3, Psap, Mdm4, Tm9sf2, Map2k5 
##     Arsa, Ncald, Appl2, Snph, Sez6l2, Pisd, Entpd6, Pfkfb3, Dhcr7, Slc25a1 
##     Clcn7, Cyp51, Hsd17b7, Slc4a3, Hivep1, Timm50, Rac1, Lnx1, Gpaa1, Naxd 
## PC_ 2 
## Positive:  Tenm2, Grm8, CT010467.1, Cadps2, Cacng3, Igfbp5, Gfra1, Anxa11, Bscl2, Bves 
##     Igfbp4, Mgat4c, Rac1, Pigt, Stc1, Slc19a1, Slc15a2, Odf3b, Pmm1, Pde4b 
##     Adgrg1, Serpinf1, Slc25a39, Disp3, Arhgef26, Arhgap28, Cptp, Pcsk2, Thumpd1, Mpp6 
## Negative:  Kcnc2, Plcl1, Klhl29, Rbfox1, Kcnip4, Ccbe1, Prkca, Shisa9, Galnt18, Gm35721 
##     Fam19a2, Plekha7, Prkd1, Myo18b, Rxfp1, Slit3, Grin2a, Neurl1b, Map2k5, Cyp4f18 
##     Rspo3, Pmfbp1, Fam81a, Lrmda, Lrriq1, Il1rapl2, Parn, Ly96, 2310002F09Rik, Pfkfb3 
## PC_ 3 
## Positive:  Ntng1, Rmst, Gfra1, Tenm2, Taf13, Tor3a, Rbfox1, Ptprt, Fam19a2, Nubpl 
##     Mgat4c, Arhgef26, Il1rapl2, Grm8, Chn2, Grin2a, Adcy2, Irf5, Vcl, Gramd1c 
##     Wipf1, Gm15601, D630003M21Rik, Trp53rka, Dlgap2, Lrmda, Smc2, Pde4b, Pld5, Gm15983 
## Negative:  Klhl29, Lax1, Chrna4, Slc7a3, Tor1a, Fam163b, Kcnc2, Gstm1, Rpl22, Fam192a 
##     Slc35b1, Mfsd3, Prkd1, Tdrkh, Gnb3, Doc2a, D930016D06Rik, Nkain4, Ada, Ppp4r1l-ps 
##     Asrgl1, Ubl3, Lss, Pgd, Xkr8, Ninj1, Znhit6, Btbd19, Golga7b, Kcnip4 
## PC_ 4 
## Positive:  Il1rapl2, Rmst, Fam19a2, Tenm2, Kcnip4, Kcnc2, Serpinf1, Cadps2, Rbfox1, Rxfp1 
##     Ctnna1, Syvn1, Myo18b, Cdh11, Nprl3, Gstm4, 4931423N10Rik, Fam163b, Gm12371, Col4a2 
##     Mcfd2, Mgat4c, Sf3b3, Fam192a, Ifngr1, Stat2, Ntng1, 1810058I24Rik, Tonsl, 4931406C07Rik 
## Negative:  Gm32647, Klhl29, 2900079G21Rik, Myt1, Tmem161a, Rcc1, Arhgef26, Tefm, Tbc1d4, Tars 
##     Spata9, Anxa11, Zmiz1os1, Hmgxb3, Pantr1, Gprin1, Cngb1, Dlgap2, Naf1, Mfsd5 
##     Doc2a, 1700102P08Rik, Pik3r6, Caskin1, Myh9, Snph, Slc5a5, Drd2, Smpdl3a, Dcbld2 
## PC_ 5 
## Positive:  C230088H06Rik, Gfra1, Tfdp1, 2310002F09Rik, Galnt18, Malat1, D030028A08Rik, Gm32647, Pmfbp1, Arhgef26 
##     Zfp872, Klhl29, Lnx1, Hic2, 4931415C17Rik, Timmdc1, Ddx3x, Mad2l1, Gm43848, Xrcc5 
##     Mpp6, Otulin, Uap1l1, Fam210a, Camkk1, Airn, Spry4, Fto, Cxxc1, Adat1 
## Negative:  Ly96, Gm17949, Nek3, Mfsd12, Serpinf1, Ccdc65, Tprkb, Ret, Opn3, Smpdl3a 
##     Serpine1, CT010467.1, Tm4sf1, ENSMUSG00000015980, Btk, Dlec1, Tubg1, Gm35721, Il21r, Msto1 
##     Cryzl2, Mettl1, Gm6859, Gm830, D530049I02Rik, Adcy2, Chrna4, Smyd5, Rdh11, Spp1
ElbowPlot(object = inh)

#inh <- JackStraw(object = inh, dims=20)
#inh <- ScoreJackStraw(inh,dims=1:20)
#JackStrawPlot(object = inh,dims=1:20)
usefuldims=1:4
dims.remove=c(1,3)
usefuldims=usefuldims[!usefuldims %in% dims.remove]
inh<- FindNeighbors(object = inh,dims=usefuldims)
## Computing nearest neighbor graph
## Computing SNN
inh<- FindClusters(object = inh, resolution = 2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1401
## Number of edges: 31880
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7305
## Number of communities: 22
## Elapsed time: 0 seconds
inh <- RunTSNE(object = inh, dims = usefuldims, perplexity=30, dim.embed = 2)

p1<-DimPlot(object = inh, reduction = 'tsne', label=TRUE)
p2<-DimPlot(object = inh, reduction = 'tsne', label=TRUE,group.by = 'DCN')
p3<-DimPlot(object = inh, reduction = 'tsne', label=TRUE,group.by = 'FACS')
plot_grid(p1,p2,p3,ncol = 3)

#FeaturePlot(inh, features=paste0("PC_", 1:4),cols = c("navy", "olivedrab1"))

merge clusters using allen package.

rd.dat <- t(GetAssayData(object = inh, slot = "scale.data"))

merge.param <- de_param(padj.th     = 0.05, 
                     lfc.th      = 1, 
                     low.th      = 1, 
                     q1.th       = 0.4, 
                     q.diff.th   = 0.6, 
                     de.score.th = 40)

merge.result <- merge_cl(as.matrix(GetAssayData(object = inh, slot = "data")), 
                         cl = inh$RNA_snn_res.2, 
                         rd.dat = rd.dat,
                         de.param = merge.param)

if (is.null(merge.result))
  {inh$merged.res.2<-1
} else {
inh$merged.res.2<-merge.result$cl
}
p2<-DimPlot(inh,group.by = "merged.res.2")
p1<-DimPlot(inh,group.by = "RNA_snn_res.2")
p3<-DimPlot(inh,group.by = 'DCN')
p4<-DimPlot(inh,group.by = 'FACS')

plot_grid(p2,p3,p4,ncol=3)

Idents(inh)<-"merged.res.2"
VlnPlot(inh,features=c("nFeature_RNA", "nCount_RNA"))

plot marker genes

FeaturePlot(object = inh, features = c("Snap25","Slc17a6","Gad1","Slc6a5"), reduction= "tsne")

Now, lets do this for the big group of glycinergic cells

gly<-subset(dcn,idents=c(3,7,15),invert=F)

DimPlot(gly)

gly <- FindVariableFeatures(object = gly,selection.method = "vst",nfeatures = 2000,verbose=FALSE)
gly <- ScaleData(object = gly,vars.to.regress = c('FACS','nFeature_RNA'))
## Regressing out FACS, nFeature_RNA
## Centering and scaling data matrix
source('../../helperfunctions/pc_modification_functions_S3_forRNA.R')
## Loading required package: qlcMatrix
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'qlcMatrix'
gly<-RunTruncatedPCA(gly,n.genes.pc = 40)
## PC_ 1 
## Positive:  Cadm1, Adamtsl1, Spon1, Kirrel3, Trpm3, Ly6d, Syt16, Slc24a3, Unc5d, Cacna1g 
##     Fbn2, Megf10, Dab1, Shisa6, Fxyd6, Cdh1, Gabbr2, Igsf8, Rftn1, Tunar 
##     Gpc5, mt-Rnr2, Eps8, Arhgef28, Plekhg1, Sema4a, Plch2, Cnpy1, Nkain1, Ptpro 
## Negative:  Slc6a5, Kcnip4, Adam23, Penk, Ccbe1, Ano4, Rbms1, Clstn2, Tenm4, Mdh1 
##     Ndrg4, Slc6a12, Adamts3, ENSMUSG00000013539, Gm26917, Ttll5, Lpcat2, Fggy, Ptar1, Ablim2 
##     Rab31, Srgap2, Hdlbp, Nova1, Rbfox1, Arhgef6, Epb41l4b, Fhit, Mrps9, Lrmda 
## PC_ 2 
## Positive:  Slc6a5, Mdh1, Kcnip4, Ccbe1, Itpr3, Map3k21, Penk, Stmn3, Cplx2, Mettl7a1 
##     Tgfb2, Ttr, Gjb1, Gm26597, Tbl3, Gm25127, 2610307P16Rik, Ctns, Rpusd4, Gm20609 
##     Ctxn1, Fyb, Trim28, Tmem53, Bysl, AC156026.1, Gm38155, Kcng2, Gm12240, Gm16283 
## Negative:  Cadm1, Spon1, Kirrel3, Slc24a3, Dab1, Plekhg1, Tnr, Ly6d, Gabbr2, Osbpl10 
##     Cacna1g, Adamts17, Atxn2l, Shisa6, Megf10, Ptprn2, Mical3, Plxna4, Anapc5, Sgcd 
##     Ptch1, Unc5d, Ptpro, Tcerg1l, Adamtsl1, Leng8, Trpm3, Syt16, Nkain1, Trim67 
## PC_ 3 
## Positive:  CT010467.1, Atp1a3, Ndrg4, Stmn3, Ppp2r1a, B4gat1, Psap, Syngr3, Kcnn3, Mdh1 
##     Ckmt1, Cnbp, Dab1, Chpf, Ddost, Rrp1, Sqstm1, Pcbp1, Disp2, Slc39a6 
##     Syvn1, Reep5, Paf1, Atxn2l, Nlgn2, Yif1a, Tmem109, Dcaf7, Adora1, Tunar 
## Negative:  Tenm4, Slit2, Cntnap5c, Asic2, Kcnip4, Bicc1, Rorb, Zfhx3, Zeb2, Cntn5 
##     2610307P16Rik, Il1rapl2, Grin2a, Rab27b, Gpc5, Sgcd, Rsu1, Chst8, Lrmda, Diaph2 
##     Gm2164, Gm20449, Ctnna3, Map3k21, Gpr161, Slc10a7, Rbfox1, Lpcat2, Wwc1, Efl1 
## PC_ 4 
## Positive:  Npas3, Cntnap5a, Dab1, Pkd2, C2cd3, Afap1, Ptprn2, Osbpl3, Etv5, Eml4 
##     Rbfox1, Raph1, Pip4k2a, Fam193a, Manba, Tctn1, Sema4a, Limd2, Wscd1, ENSMUSG00000022253 
##     Asap2, Rnf146, ENSMUSG00000000325, Inpp4a, Nkain1, Fhit, Trpc7, Yaf2, Ptp4a2, Chd7 
## Negative:  Tenm3, Cntnap5c, Pdzrn4, Chsy3, Ebf3, Meis2, Fam19a1, Cntn5, Pdgfb, CT010467.1 
##     Gm6999, Tmem132e, Trim36, Mfsd5, Etnppl, Tle2, Spink4, Clec2j, Card10, Tst 
##     4921539H07Rik, Gm15518, Mdh1, C430002N11Rik, C230012O17Rik, Atp1a3, Syngr3, Mir1969, Ebf1, Asic2 
## PC_ 5 
## Positive:  Asic2, Arhgap6, Zeb2, Pde8b, Tenm4, Dnhd1, Ccbe1, Grin2a, Ssbp3, Slco4a1 
##     Cdh6, Rab3b, Plch2, Tmem28, Tmem74b, Lin9, Xpo5, Tmem231, Syt16, Rorb 
##     Fam131a, Phf12, Errfi1, Pnpla2, Cabyr, Slc6a5, Capns1, Siah3, Paf1, Zfhx3 
## Negative:  Cntnap5a, Tst, Cntnap5c, Il1rapl2, Dab1, Chsy3, Ebf3, Spp1, Kcnn3, Etnppl 
##     Rnf17, Frmd8os, Thsd7b, Fastkd2, Gm6999, Eva1a, Mir1969, Srgap1, Dcst1, Rbfox1 
##     Gm15518, 4921539H07Rik, Tph1, Tle2, C230012O17Rik, Fam160a2, Ankzf1, Glb1, Gm14696, Rny1
ElbowPlot(object = gly)

gly <- JackStraw(object = gly, dims=20)
gly <- ScoreJackStraw(gly,dims=1:20)
JackStrawPlot(object = gly,dims=1:20)

usefuldims=1:5
dims.remove=c()
usefuldims=usefuldims[!usefuldims %in% dims.remove]
gly<- FindNeighbors(object = gly,dims=usefuldims)
## Computing nearest neighbor graph
## Computing SNN
gly<- FindClusters(object = gly, reduction.type = "pca", dims.use = usefuldims, resolution = 2, print.output = 0, save.SNN = TRUE, force.recalc = TRUE,algorithm = 1,n.start = 20)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 851
## Number of edges: 26235
## 
## Running Louvain algorithm...
## Maximum modularity in 20 random starts: 0.5111
## Number of communities: 13
## Elapsed time: 0 seconds
gly <- RunTSNE(object = gly, dims = usefuldims, perplexity=30, dim.embed = 2)

p1<-DimPlot(object = gly, reduction = 'tsne', label=TRUE)
p2<-DimPlot(object = gly, reduction = 'tsne', label=TRUE,group.by = 'DCN')
p3<-DimPlot(object = gly, reduction = 'tsne', label=TRUE,group.by = 'FACS')
plot_grid(p1,p2,p3,ncol = 3)

merge clusters using allen package.

rd.dat <- t(GetAssayData(object = gly, slot = "scale.data"))

merge.param <- de_param(padj.th     = 0.05, 
                     lfc.th      = 1, 
                     low.th      = 1, 
                     q1.th       = 0.4, 
                     q.diff.th   = 0.6, 
                     de.score.th = 40)

merge.result <- merge_cl(as.matrix(GetAssayData(object = gly, slot = "data")), 
                         cl = gly$RNA_snn_res.2, 
                         rd.dat = rd.dat,
                         de.param = merge.param)

if (is.null(merge.result))
  {gly$merged.res.2<-1
} else {
gly$merged.res.2<-merge.result$cl
}
p2<-DimPlot(gly,group.by = "merged.res.2")
p1<-DimPlot(gly,group.by = "RNA_snn_res.2")
p3<-DimPlot(gly,group.by = 'DCN')
p4<-DimPlot(gly,group.by = 'FACS')

plot_grid(p2,p3,p4,ncol=3)

#FeaturePlot(gly, features=paste0("PC_", 1:10),cols = c("navy", "olivedrab1"))
Idents(gly)<-"merged.res.2"

Now let’s integrate these blobs of inhibitory or glycinergic cells with the smaller distinct groups of those cells and save out the cell types.

gly.complete<-subset(dcn,idents = c(3,7,15,12,16))
#annotate
prelim.clusters<-as.data.frame(gly.complete$RNA_snn_res.2)
prelim.clusters$`gly.complete$RNA_snn_res.2`<-as.character(prelim.clusters$`gly.complete$RNA_snn_res.2`)

prelim.clusters[names(gly$merged.res.2),]<-paste('gly_',as.character(gly$merged.res.2))
prelim.clusters$`gly.complete$RNA_snn_res.2`<-as.factor(prelim.clusters$`gly.complete$RNA_snn_res.2`)
colnames(prelim.clusters)<-'prelim.clusters'
gly.complete$prelim.clusters<-(prelim.clusters)

Idents(gly.complete)<-'prelim.clusters'
DimPlot(gly.complete)

gly.complete <- FindVariableFeatures(object = gly.complete,selection.method = "vst",nfeatures = 2000,verbose=FALSE)
gly.complete <- ScaleData(object = gly.complete,vars.to.regress = c('nCount_RNA','FACS','nFeature_RNA'))
## Regressing out nCount_RNA, FACS, nFeature_RNA
## Centering and scaling data matrix
gly.complete<-RunTruncatedPCA(gly.complete,n.genes.pc = 40)
## PC_ 1 
## Positive:  Penk, Ccbe1, Jup, Slc36a2, Npas3, Trim67, Stxbp2, Etv5, Shisa9, Apcdd1 
##     Cdh6, Runx2, Lrrn1, Gfra2, Lpcat2, Slc6a12, Slc6a5, Ptar1, Atp1a3, Ndrg4 
##     Adrb2, Aqp6, Osbpl3, Tbc1d9, Hs3st4, Zyx, Tmem132a, Stmn3, Nuak2, Hs3st2 
## Negative:  Grm7, Ptprk, Unc5d, Edil3, Fstl5, Egfem1, Gm26871, Tenm3, Dpp10, Gria1 
##     Alcam, Dcc, Alk, Ebf3, Pcdh9, Gabrg3, Cacng3, Grik1, Gabrb1, Nell1 
##     Cntn5, Fam19a2, Kcnj6, Cadm1, Nckap5, Grm8, Megf11, Kcnk9, Sst, Frmpd3 
## PC_ 2 
## Positive:  Shisa9, Nckap5, Gabrg3, Megf11, Gria1, Gm20754, Sst, Alk, Slc16a2, Cacng3 
##     Edil3, Kcnh7, Arhgap6, Cnih3, Cdh4, Plpp4, Slc30a3, Alcam, Syt16, Slc6a11 
##     Gap43, Pxdn, Fam126a, Slit1, Fam107b, Dcc, Gabrb1, Litaf, Gpd1l, Cidea 
## Negative:  Grik1, Slit2, Fam19a2, Zfhx4, Pde9a, Cntnap5c, AC124532.2, Grm8, Kcnk9, Egfem1 
##     Lrfn2, Tenm3, Adamts17, Nell1, Cacna1i, Grm4, Il1rapl2, Olfml2b, B3gat1, Zfp804b 
##     Skap2, Colq, Kcng4, Hydin, Lrmda, St6gal1, Sema3e, D630045J12Rik, Gpc5, Hhatl 
## PC_ 3 
## Positive:  Pcdh15, Slc6a5, Tenm4, Kcnip4, Syt1, Alk, Ptprk, Megf11, Adam23, Gabrg3 
##     Edil3, Pcdh9, Rbfox1, Ror1, Gm20754, Cntn5, Asic2, Dcc, Adamts3, Gpc6 
##     Dlg2, Ttll5, Gria1, Kcnh7, Ebf3, Penk, Slit3, Cacng3, Ccbe1, Ppargc1b 
## Negative:  Dab1, Spon1, Cadm1, Ly6d, Kirrel3, Cdh1, Tspan18, Calca, Tox, Arhgef28 
##     Syt16, Adamtsl1, Megf10, Eps8, Nkain1, mt-Rnr2, Chrna6, Pde1a, Slc6a2, Th 
##     Syt5, Fbn2, Cacna2d1, Tgfa, Slc7a3, Wdr72, Wls, Fxyd6, Ptpro, Slc16a12 
## PC_ 4 
## Positive:  CT010467.1, Atp1a3, Ndrg4, Ppp2r1a, Syngr3, Atxn2l, Ckmt1, Leng8, Hnrnpk, B4gat1 
##     Fxyd6, Disp2, Slc30a3, Dab1, Reep5, Acp2, Psap, Cnbp, Sgta, Fam163b 
##     Cars, Kcnn3, Stmn3, Trim67, Dcaf7, Nlgn2, Rab34, Anapc5, Syp, Tmub2 
## Negative:  Map3k21, Wdr72, Pcdh15, Gpc6, 2610307P16Rik, Litaf, Zfhx3, Afap1, Kcnip4, Zfp804b 
##     Rxfp1, Fgd5, Gm15477, Prr5l, Syt1, Npas3, Dlg2, Slc38a3, Grm7, Tenm4 
##     Dcc, Mettl7a1, Hydin, 4430402I18Rik, Alk, Slit1, Scn7a, ENSMUSG00000032648, 2310039L15Rik, Lamc2 
## PC_ 5 
## Positive:  Tspan18, Wls, Th, Slc6a2, Chrna6, Cdh8, Tox, Calca, Cacna2d1, Dubr 
##     Disp3, Ctxn1, Fam159b, Irgm1, Kcnip4, Fam163b, Fibcd1, Plin5, Gpx3, Vwa5b1 
##     Lamc2, Chrna3, 4931415C17Rik, Cntnap5c, Gm45592, Zc2hc1c, Pcdh15, Gm40348, Wdr72, Gm5455 
## Negative:  Kirrel3, Ly6d, Cadm1, Adamtsl1, Dab1, Trpm3, Spon1, Megf10, Wscd1, Cnpy1 
##     Nol4, Pcdh9, Ptprk, Cdh1, Plekhg1, Inpp5a, Dpp10, Adamts17, Gpc5, Ebf3 
##     Nkain1, Sst, Tcerg1l, Fbn2, Plpp4, Gabrg3, Sema4a, Sergef, Shc4, Rin2
ElbowPlot(object = gly.complete)

usefuldims=1:10
dims.remove=c()
usefuldims=usefuldims[!usefuldims %in% dims.remove]
gly.complete<- FindNeighbors(object = gly.complete,dims=usefuldims)
## Computing nearest neighbor graph
## Computing SNN
gly.complete <- RunTSNE(object = gly.complete, dims = usefuldims, perplexity=30, dim.embed = 2)

p1<-DimPlot(object = gly.complete, reduction = 'tsne', label=TRUE)
p2<-DimPlot(object = gly.complete, reduction = 'tsne', label=TRUE,group.by = 'DCN')
p3<-DimPlot(object = gly.complete, reduction = 'tsne', label=TRUE,group.by = 'FACS')
plot_grid(p1,p2,p3,ncol = 3)

rename idents

gly.complete <- RenameIdents(object = gly.complete, `gly_ 0` = "gly_0")
gly.complete <- RenameIdents(object = gly.complete, `gly_ 1` = "gly_1")
gly.complete <- RenameIdents(object = gly.complete, `gly_ 4` = "gly_2")
gly.complete <- RenameIdents(object = gly.complete, `12` = "gly_3")
gly.complete <- RenameIdents(object = gly.complete, `16` = "gly_4")

gly.complete[["prelim.clusters"]] <- Idents(object = gly.complete)


p1<-DimPlot(object = gly.complete, reduction = 'tsne', label=TRUE)
p2<-DimPlot(object = gly.complete, reduction = 'tsne', label=F,group.by = 'DCN')
p3<-DimPlot(object = gly.complete, reduction = 'tsne', label=F,group.by = 'FACS')
plot_grid(p1,p2,p3,ncol = 3)

FeaturePlot(gly.complete,c("Slc17a6","Gad1","Slc6a5","Slc6a1"))

Now let’s port the subclustering results back to the integrative dcn dataset

final.clusters<-as.data.frame(dcn$RNA_snn_res.2)
final.clusters$`dcn$RNA_snn_res.2`<-as.character(final.clusters$`dcn$RNA_snn_res.2`)
final.clusters[names(ext$merged.res.2),]<-paste('',as.character(ext$merged.res.2),sep='')
final.clusters[names(inh$merged.res.2),]<-paste('inh_',as.character(inh$merged.res.2),sep='')
final.clusters[names(gly.complete$prelim.clusters),]<-paste('',as.character(gly.complete$prelim.clusters),sep='')
final.clusters$`dcn$RNA_snn_res.2`<-as.factor(final.clusters$`dcn$RNA_snn_res.2`)
colnames(final.clusters)<-'final.clusters'
dcn$final.clusters<-(final.clusters)
p1<-DimPlot(dcn,group.by = "final.clusters",pt.size = 1,label = T)
p2<-DimPlot(dcn,group.by = 'DCN',pt.siz=1)
p3<-DimPlot(dcn,group.by = 'FACS')
plot_grid(p1,p2,ncol = 2)

FeaturePlot(dcn,c("Snap25","Slc17a6","Gad1","Slc6a5"))

Idents(dcn)<-'final.clusters'
VlnPlot(dcn,c('nFeature_RNA','nCount_RNA'),ncol = 1)